#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>

__global__ void add (int a, int b, int *c) {
	*c = a+b;
}

int main (int argc, char* argv[]) {
	printf("Hello from your GPU!\n");
	
	int c;
	int *dev_c;	

	cudaMalloc<int>(&dev_c, sizeof(int));
	add<<<1, 1>>>(2, 7, dev_c);
	cudaMemcpy(&c, dev_c, sizeof(int), cudaMemcpyDeviceToHost);
	printf("2 + 7 = %d\n", c);

	cudaFree(dev_c);

	getchar();
	return EXIT_SUCCESS;

}